This code analyses splitting statistics for CTC-clusters.
The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
inputFolder <- "/Users/jgawron/Documents/projects/CTC_backup/input_folder"
simulationInputFolder <- "/Users/jgawron/Documents/projects/CTC_backup/simulations/simulations2"
treeName <- "Br16_B"
nTreeSamplingEvents <- 1000
nMutationSamplingEvents <- 1000
source("/Users/jgawron/Documents/projects/CTC-SCITE/CTC-SCITE/experiments/workflow/resources/functions.R")
## ── Attaching core tidyverse packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
input <- load_data(inputFolder, treeName)
## Rows: 40000 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): Tree
## dbl (4): LogScore, SequencingErrorRate, DropoutRate, LogTau
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 557 Columns: 112
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): X1, X3, X4
## dbl (109): X2, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17, X...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 54 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): Cluster, Description
## dbl (3): CellCount, TCs, WBCs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
postSampling <- input$postSampling
nClusters <- input$nClusters
ClusterID <- input$clusterID
nCells <- input$nCells
nMutations <- input$nMutations
nClusters <- input$nClusters
alleleCount <- input$alleleCount
mutatedReadCounts <- input$mutatedReadCounts
totalReadCounts <- input$totalReadCounts
sampleDescription <- input$sample_description
Each row corresponds to a cell. Column description: - Cluster: An number indicating which sample the cell belongs to. - ClusterName: The name of the sample in the nodeDescription.tsv file - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0). - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv file.
print(sampleDescription)
## Cluster ClusterName WBC color single_cell
## 1 0 Br16_B10 0 deeppink2 FALSE
## 2 0 Br16_B10 0 deeppink2 FALSE
## 3 0 Br16_B10 0 deeppink2 FALSE
## 4 1 Br16_B11 0 lightpink2 FALSE
## 5 1 Br16_B11 0 lightpink2 FALSE
## 6 1 Br16_B11 0 lightpink2 FALSE
## 7 2 Br16_B12 0 gray93 TRUE
## 8 3 Br16_B15 0 orangered4 TRUE
## 9 4 Br16_B16 0 orangered4 FALSE
## 10 4 Br16_B16 0 orangered4 FALSE
## 11 5 Br16_B17 0 gray93 TRUE
## 12 6 Br16_B18 0 peachpuff1 FALSE
## 13 6 Br16_B18 0 peachpuff1 FALSE
## 14 7 Br16_B19 0 peachpuff1 FALSE
## 15 7 Br16_B19 0 peachpuff1 FALSE
## 16 7 Br16_B19 0 peachpuff1 FALSE
## 17 7 Br16_B19 0 peachpuff1 FALSE
## 18 8 Br16_B2 0 gray93 TRUE
## 19 9 Br16_B20 0 yellow4 FALSE
## 20 9 Br16_B20 0 yellow4 FALSE
## 21 9 Br16_B20 0 yellow4 FALSE
## 22 9 Br16_B20 0 yellow4 FALSE
## 23 9 Br16_B20 0 yellow4 FALSE
## 24 10 Br16_B22 0 sienna2 FALSE
## 25 10 Br16_B22 0 sienna2 FALSE
## 26 10 Br16_B22 0 sienna2 FALSE
## 27 10 Br16_B22 0 sienna2 FALSE
## 28 11 Br16_B23 0 rosybrown4 FALSE
## 29 11 Br16_B23 0 rosybrown4 FALSE
## 30 11 Br16_B23 0 rosybrown4 FALSE
## 31 12 Br16_B24 0 springgreen TRUE
## 32 13 Br16_B25 0 springgreen FALSE
## 33 13 Br16_B25 0 springgreen FALSE
## 34 13 Br16_B25 0 springgreen FALSE
## 35 14 Br16_B26 0 palegreen3 FALSE
## 36 14 Br16_B26 0 palegreen3 FALSE
## 37 14 Br16_B26 0 palegreen3 FALSE
## 38 14 Br16_B26 0 palegreen3 FALSE
## 39 15 Br16_B28 0 orangered FALSE
## 40 15 Br16_B28 0 orangered FALSE
## 41 16 Br16_B29 0 mediumorchid4 FALSE
## 42 16 Br16_B29 0 mediumorchid4 FALSE
## 43 17 Br16_B3 0 gray93 TRUE
## 44 18 Br16_B30 0 mediumorchid4 TRUE
## 45 19 Br16_B31 0 mediumorchid4 FALSE
## 46 19 Br16_B31 0 mediumorchid4 FALSE
## 47 19 Br16_B31 0 mediumorchid4 FALSE
## 48 19 Br16_B31 0 mediumorchid4 FALSE
## 49 20 Br16_B32 0 gray93 TRUE
## 50 21 Br16_B33 0 gray93 TRUE
## 51 22 Br16_B34 0 gray93 TRUE
## 52 23 Br16_B35 0 gray93 TRUE
## 53 24 Br16_B36 0 gray93 TRUE
## 54 25 Br16_B37 0 gray93 TRUE
## 55 26 Br16_B39 0 honeydew1 FALSE
## 56 26 Br16_B39 0 honeydew1 FALSE
## 57 27 Br16_B4 0 burlywood4 FALSE
## 58 27 Br16_B4 0 burlywood4 FALSE
## 59 28 Br16_B40 0 palegoldenrod FALSE
## 60 28 Br16_B40 0 palegoldenrod FALSE
## 61 29 Br16_B41 0 gray93 TRUE
## 62 30 Br16_B42 0 gray93 TRUE
## 63 31 Br16_B43 0 hotpink FALSE
## 64 31 Br16_B43 0 hotpink FALSE
## 65 31 Br16_B43 0 hotpink FALSE
## 66 32 Br16_B44 0 goldenrod FALSE
## 67 32 Br16_B44 0 goldenrod FALSE
## 68 32 Br16_B44 0 goldenrod FALSE
## 69 33 Br16_B45 0 tan FALSE
## 70 33 Br16_B45 0 tan FALSE
## 71 33 Br16_B45 0 tan FALSE
## 72 34 Br16_B46 0 tan FALSE
## 73 34 Br16_B46 0 tan FALSE
## 74 34 Br16_B46 0 tan FALSE
## 75 35 Br16_B48 0 brown4 FALSE
## 76 35 Br16_B48 0 brown4 FALSE
## 77 35 Br16_B48 0 brown4 FALSE
## 78 36 Br16_B49 0 gray93 TRUE
## 79 37 Br16_B5 0 yellow FALSE
## 80 37 Br16_B5 0 yellow FALSE
## 81 38 Br16_B50 0 gray93 TRUE
## 82 39 Br16_B51 0 gray93 TRUE
## 83 40 Br16_B52 0 gray93 TRUE
## 84 41 Br16_B53 0 wheat FALSE
## 85 41 Br16_B53 0 wheat FALSE
## 86 41 Br16_B53 0 wheat FALSE
## 87 42 Br16_B54 0 gray93 TRUE
## 88 43 Br16_B55 0 lawngreen FALSE
## 89 43 Br16_B55 0 lawngreen FALSE
## 90 43 Br16_B55 0 lawngreen FALSE
## 91 44 Br16_B56 0 indianred FALSE
## 92 44 Br16_B56 0 indianred FALSE
## 93 45 Br16_B57 0 turquoise4 FALSE
## 94 45 Br16_B57 0 turquoise4 FALSE
## 95 46 Br16_B58 0 turquoise4 FALSE
## 96 46 Br16_B58 0 turquoise4 FALSE
## 97 46 Br16_B58 0 turquoise4 FALSE
## 98 46 Br16_B58 0 turquoise4 FALSE
## 99 47 Br16_B59 0 gray93 TRUE
## 100 48 Br16_B6 0 tomato FALSE
## 101 48 Br16_B6 0 tomato FALSE
## 102 49 Br16_B60 0 gray93 TRUE
## 103 50 Br16_B61 0 red FALSE
## 104 50 Br16_B61 0 red FALSE
## 105 50 Br16_B61 0 red FALSE
## 106 51 Br16_B7 0 blue FALSE
## 107 51 Br16_B7 0 blue FALSE
## 108 52 Br16_B8 0 gray93 TRUE
## 109 53 Br16_B9 0 gray93 TRUE
Get null distributions of relevant statistics, stratified by sample:
cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
for (clusterSize in 2:5){
try(
{treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
postSamplingSimulated <- inputSimulated$postSampling
nClustersSimulated <- inputSimulated$nClusters
ClusterIDSimulated <- inputSimulated$clusterID
nCellsSimulated <- inputSimulated$nCells
nMutationsSimulated <- inputSimulated$nMutations
nClustersSimulated <- inputSimulated$nClusters
alleleCountSimulated <- inputSimulated$alleleCount
mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
totalReadCountsSimulated <- inputSimulated$totalReadCounts
sampleDescriptionSimulated <- inputSimulated$sample_description
distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
nMutationsSimulated, nClustersSimulated,
alleleCountSimulated,
mutatedReadCountsSimulated, totalReadCountsSimulated,
nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
cellPairSelection = c("orchid", "orchid1", "orchid2",
"orchid3", "orchid4", "darkorchid",
"darkorchid1","darkorchid2", "darkorchid3",
"darkorchid4", "purple", "purple1",
"purple2", "purple3", "purple4"))
plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
geom_boxplot())
cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
## of the same size, it is however the potential source of a future bug!!
plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
geom_histogram(binwidth = 0.01))
print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
})
}
## Rows: 21619 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): Tree
## dbl (4): LogScore, SequencingErrorRate, DropoutRate, LogTau
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 557 Columns: 120
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): X1, X3, X4
## dbl (117): X2, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17, X...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 58 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): Cluster, Description
## dbl (3): CellCount, TCs, WBCs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "Computing genomic distances of leaves: 110 109"
## [1] "Computing the posterior distribution"
## [1] "Computing genomic distances of leaves: 112 111"
## [1] "Computing the posterior distribution"
## [1] "Computing genomic distances of leaves: 114 113"
## [1] "Computing the posterior distribution"
## [1] "Computing genomic distances of leaves: 116 115"
## [1] "Computing the posterior distribution"
## clusterSize Cutoff
## 1 2 1
## Rows: 21449 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): Tree
## dbl (4): LogScore, SequencingErrorRate, DropoutRate, LogTau
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 557 Columns: 118
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): X1, X3, X4
## dbl (115): X2, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17, X...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 57 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): Cluster, Description
## dbl (3): CellCount, TCs, WBCs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "Computing genomic distances of leaves: 110 109"
## [1] "Computing the posterior distribution"
## [1] "Computing genomic distances of leaves: 113 112"
## [1] "Computing the posterior distribution"
## [1] "Computing genomic distances of leaves: 116 115"
## [1] "Computing the posterior distribution"
## clusterSize Cutoff
## 1 3 0.5
## Rows: 20069 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): Tree
## dbl (4): LogScore, SequencingErrorRate, DropoutRate, LogTau
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 557 Columns: 116
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): X1, X3, X4
## dbl (113): X2, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17, X...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 56 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): Cluster, Description
## dbl (3): CellCount, TCs, WBCs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "Computing genomic distances of leaves: 110 109"
## [1] "Computing the posterior distribution"
## [1] "Computing genomic distances of leaves: 114 113"
## [1] "Computing the posterior distribution"
## clusterSize Cutoff
## 1 4 0.6471334
## Rows: 19190 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): Tree
## dbl (4): LogScore, SequencingErrorRate, DropoutRate, LogTau
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 557 Columns: 116
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): X1, X3, X4
## dbl (113): X2, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17, X...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 56 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): Cluster, Description
## dbl (3): CellCount, TCs, WBCs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "Computing genomic distances of leaves: 110 109"
## [1] "Computing the posterior distribution"
## [1] "Computing genomic distances of leaves: 115 114"
## [1] "Computing the posterior distribution"
## clusterSize Cutoff
## 1 5 0.7549453
Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
nTumorClusters <- 0
nOligoclonalClusters1 <- 0
nOligoclonalClusters2 <- 0
splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
for(clusterSize in 2:5){
try({
clusterColor <- sampleDescription %>%
filter(WBC ==0 & color != 'gray93') %>%
group_by(color) %>%
filter(n() == clusterSize) %>%
pull(color) %>%
unique()
for(color in clusterColor){
distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
nMutations, nClusters,
alleleCount,
mutatedReadCounts, totalReadCounts,
nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
cellPairSelection = c(color))
splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
nTumorClusters <- nTumorClusters + 1
oligoclonal <- FALSE
print(clusterSize)
print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
oligoclonal <- TRUE
}
splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
oligoclonal <- FALSE
if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
oligoclonal <- TRUE
}
splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
}
})
}
## [1] "Computing genomic distances of leaves: 39 38"
## [1] "Computing the posterior distribution"
## [1] 2
## [1] 1.021147
## [1] "Computing genomic distances of leaves: 55 54"
## [1] "Computing the posterior distribution"
## [1] 2
## [1] 1.021147
## [1] "Computing genomic distances of leaves: 57 56"
## [1] "Computing the posterior distribution"
## [1] 2
## [1] 1.021147
## [1] "Computing genomic distances of leaves: 59 58"
## [1] "Computing the posterior distribution"
## [1] 2
## [1] 1.021147
## [1] "Computing genomic distances of leaves: 79 78"
## [1] "Computing the posterior distribution"
## [1] 2
## [1] 1.021147
## [1] "Computing genomic distances of leaves: 91 90"
## [1] "Computing the posterior distribution"
## [1] 2
## [1] 1.021147
## [1] "Computing genomic distances of leaves: 100 99"
## [1] "Computing the posterior distribution"
## [1] 2
## [1] 1.021147
## [1] "Computing genomic distances of leaves: 106 105"
## [1] "Computing the posterior distribution"
## [1] 2
## [1] 1.021147
## [1] "Computing genomic distances of leaves: 1 0"
## [1] "Computing the posterior distribution"
## [1] 3
## [1] 0.876659
## [1] "Computing genomic distances of leaves: 4 3"
## [1] "Computing the posterior distribution"
## [1] 3
## [1] 0.876659
## [1] "Computing genomic distances of leaves: 8 7"
## [1] "Computing the posterior distribution"
## [1] 3
## [1] 0.876659
## [1] "Computing genomic distances of leaves: 28 27"
## [1] "Computing the posterior distribution"
## [1] 3
## [1] 0.876659
## [1] "Computing genomic distances of leaves: 63 62"
## [1] "Computing the posterior distribution"
## [1] 3
## [1] 0.876659
## [1] "Computing genomic distances of leaves: 66 65"
## [1] "Computing the posterior distribution"
## [1] 3
## [1] 0.876659
## [1] "Computing genomic distances of leaves: 75 74"
## [1] "Computing the posterior distribution"
## [1] 3
## [1] 0.876659
## [1] "Computing genomic distances of leaves: 84 83"
## [1] "Computing the posterior distribution"
## [1] 3
## [1] 0.876659
## [1] "Computing genomic distances of leaves: 88 87"
## [1] "Computing the posterior distribution"
## [1] 3
## [1] 0.876659
## [1] "Computing genomic distances of leaves: 103 102"
## [1] "Computing the posterior distribution"
## [1] 3
## [1] 0.876659
## [1] "Computing genomic distances of leaves: 24 23"
## [1] "Computing the posterior distribution"
## [1] 4
## [1] 0.7895177
## [1] "Computing genomic distances of leaves: 31 30"
## [1] "Computing the posterior distribution"
## [1] 4
## [1] 0.7895177
## [1] "Computing genomic distances of leaves: 35 34"
## [1] "Computing the posterior distribution"
## [1] 4
## [1] 0.7895177
## [1] "Computing genomic distances of leaves: 19 18"
## [1] "Computing the posterior distribution"
## [1] 5
## [1] 0.7751354
numberOfCancerClusters <- sampleDescription %>%
filter(WBC ==0 & color != 'gray93') %>%
group_by(color) %>%
filter(n() > 1) %>%
pull(color) %>%
unique() %>% length()
print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
## [1] "11 out of 26 clusters were found to be oligoclonal in Br16_B, using method 1"
print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
## [1] "6 out of 26 clusters were found to be oligoclonal in Br16_B, using method 2"
print(splittingSummary1)
## Color Oligoclonal ClusterSize
## 1 orangered FALSE 2
## 2 honeydew1 FALSE 2
## 3 burlywood4 FALSE 2
## 4 palegoldenrod FALSE 2
## 5 yellow FALSE 2
## 6 indianred FALSE 2
## 7 tomato FALSE 2
## 8 blue FALSE 2
## 9 deeppink2 TRUE 3
## 10 lightpink2 TRUE 3
## 11 orangered4 TRUE 3
## 12 rosybrown4 TRUE 3
## 13 hotpink TRUE 3
## 14 goldenrod TRUE 3
## 15 brown4 FALSE 3
## 16 wheat TRUE 3
## 17 lawngreen TRUE 3
## 18 red FALSE 3
## 19 sienna2 TRUE 4
## 20 springgreen TRUE 4
## 21 palegreen3 TRUE 4
## 22 yellow4 FALSE 5
print(splittingSummary2)
## Color Oligoclonal ClusterSize
## 1 orangered FALSE 2
## 2 honeydew1 FALSE 2
## 3 burlywood4 FALSE 2
## 4 palegoldenrod FALSE 2
## 5 yellow FALSE 2
## 6 indianred FALSE 2
## 7 tomato FALSE 2
## 8 blue FALSE 2
## 9 deeppink2 TRUE 3
## 10 lightpink2 TRUE 3
## 11 orangered4 TRUE 3
## 12 rosybrown4 TRUE 3
## 13 hotpink FALSE 3
## 14 goldenrod TRUE 3
## 15 brown4 FALSE 3
## 16 wheat TRUE 3
## 17 lawngreen FALSE 3
## 18 red FALSE 3
## 19 sienna2 FALSE 4
## 20 springgreen FALSE 4
## 21 palegreen3 FALSE 4
## 22 yellow4 FALSE 5